## SETUP FOR ANALYSIS ####

#NOTE: To run, create a "Figures" folder in the same folder as data and script

#install_github("naoki-egami/exr", dependencies = TRUE) # use if necessary
packages <- c("ggplot2","ggstance","gdata","gridExtra","stargazer","haven","foreign",
              "ivreg","lmtest","sandwich","exr","devtools","xtable")

ipak <- function(pkg){new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
ipak(packages)


rm(list = ls())

setwd("...")

# Read and Merge datasets

b = read.csv("BaselineMidline_Cleaned.csv",stringsAsFactors = F)
d = data.frame(read.spss("Endline_Cleaned.sav"))
h = data.frame(read.spss("Household_Cleaned.sav"))

d$id <- as.numeric(as.character(d$id))
h$id <- as.numeric(as.character(h$id))

d <- merge(d,b,by="id",all=T)
d <- merge(d,h,by="id",all=T)
d <- d[order(d$id),]
rm(b,h)

# Setup of main covariates

d$age <- 2018 - d$bQ2
d$male <- ifelse(d$bQ1=="Male",1,0)
d$edu <- (d$bQ13=="Secondary school: 10th Standard completed")+
  (d$bQ13=="Senior secondary school: 12th Standard completed")*2 +
  (d$bQ13=="Graduate: Degree completed")*3 +
  (d$bQ13=="Postgraduate: Degree completed")*3
d$employed <- ifelse(d$bQ14=="Yes",1,0)
d$married <- ifelse(d$bQ10=="Yes",1,0)
d$st <- ifelse(d$bQ4=="ST",1,0)

d$respond_m <- ifelse(!is.na(d$mQ10),1,0)
d$respond_e <- ifelse(!is.na(d$Q1a),1,0)
d$respond_h <- ifelse(!is.na(d$hQ1),1,0)
d$mig <- ifelse(as.numeric(d$Q18a)==3|as.numeric(d$Q18b)==3|
                  as.numeric(d$Q18c)==3|as.numeric(d$Q18d)==3,1,0)



## Functions for Main Analysis ####

#Create hypothetical treatment vectors

set.seed(032719)
reps <- 10000
treat_hyp <- matrix(NA,nrow=nrow(d),ncol=reps)
blocks <- unique(d$block[!is.na(d$block)])
for(i in 1:reps){
  newtreat <- rep(NA,nrow(d))
  for(j in 1:length(blocks)){
    if(sum(!is.na(d$block)&d$block==blocks[j])==1) newtreat[!is.na(d$block)&d$block==blocks[j]] <- rbinom(n = 1,size=1,prob = .5)
    else newtreat[!is.na(d$block)&d$block==blocks[j]] <- sample(d$treatment[!is.na(d$block)&d$block==blocks[j]])
  }
  treat_hyp[,i] <- newtreat
}

#Functions to calculate ATE and P-Value

indexer <- function(mat){
  newmat <- matrix(NA,nrow=nrow(mat),ncol=ncol(mat))
  for(j in 1:ncol(mat)){
    m <- mean(mat[d$treatment==0,j],na.rm=T)
    sd <- sd(mat[d$treatment==0,j],na.rm = T)
    if(sd==0){
      sd <- sd(mat[,j],na.rm = T) }
    newmat[,j] <- (mat[,j]-m)/sd
  }
  index_var <- rep(NA,nrow(mat))
  for(j in 1:nrow(mat)){
    index_var[j] <- ifelse(sum(!is.na(newmat[j,]))>0,
                           sum(newmat[j,],na.rm=T),NA)
  }
  sdev <- sd(index_var[d$treatment==0],na.rm=T)
  return(index_var/sdev)
}

regress_iv <- function(var, pre, two.sided){
  
  mean_c <- mean(var[d$treatment==0],na.rm=T)
  mean_t <- mean(var[d$treatment==1],na.rm=T)
  diff_in_means <- mean_t-mean_c
  
  reg_table <- summary(lm(var~d$treatment+pre))$coefficients
  ate <- reg_table[2,1]
  se_ols <- reg_table[2,2]
  n <- sum(!is.na(var)&!is.na(d$treatment))
  pval_ols <- ifelse(two.sided,reg_table[2,4],
                     ifelse(ate>0,reg_table[2,4]/2,1-reg_table[2,4]/2))
  
  diff_ests <- ate_ests <- rep(NA,length(reps))
  for(i in 1:reps){
    diff_ests[i] <- mean(var[treat_hyp[,i]==1],na.rm=T) - mean(var[treat_hyp[,i]==0],na.rm=T)
    ate_ests[i] <- summary(lm(var~treat_hyp[,i]+pre))$coefficients[2,1]
  }
  
  iv_table <- summary(ivreg(var~ pre | d$mig | d$treatment))$coefficients
  ate_iv <- iv_table[2,1]
  pval_iv <- ifelse(two.sided,iv_table[2,4],
                    ifelse(ate_iv>0,iv_table[2,4]/2,1-iv_table[2,4]/2))
  
  pval_simple <- ifelse(two.sided,mean(abs(diff_ests)>=abs(diff_in_means)),mean(diff_ests>=diff_in_means))
  pval_regress <- ifelse(two.sided,mean(abs(ate_ests)>=abs(ate)),mean(ate_ests>=ate))
  return(c(mean_c,mean_t,pval_simple,ate,se_ols,pval_regress,pval_ols,ate_iv,pval_iv,n))
}


regress <- function(var, pre, two.sided){
  
  mean_c <- mean(var[d$treatment==0],na.rm=T)
  mean_t <- mean(var[d$treatment==1],na.rm=T)
  diff_in_means <- mean_t-mean_c
  
  reg_table <- summary(lm(var~d$treatment+pre))$coefficients
  ate <- reg_table[2,1]
  se <- reg_table[2,2]
  n <- sum(!is.na(var)&!is.na(d$treatment))
  pval_ols <- ifelse(two.sided,reg_table[2,4],reg_table[2,4]/2)
  
  diff_ests <- ate_ests <- rep(NA,length(reps))
  for(i in 1:reps){
    diff_ests[i] <- mean(var[treat_hyp[,i]==1],na.rm=T) - mean(var[treat_hyp[,i]==0],na.rm=T)
    ate_ests[i] <- summary(lm(var~treat_hyp[,i]+pre))$coefficients[2,1]
  }
  
  pval_simple <- ifelse(two.sided,mean(abs(diff_ests)>=abs(diff_in_means)),mean(diff_ests>=diff_in_means))
  pval_regress <- ifelse(two.sided,mean(abs(ate_ests)>=abs(ate)),mean(ate_ests>=ate))
  return(c(mean_c,mean_t,pval_simple,ate,se,pval_regress,pval_ols,n))
}


multiple_results <- function(matr,pr,twosided=F){
  resultsmat <- matrix(nrow=ncol(matr),ncol=10)
  colnames(resultsmat) <- c("C.Mean","T.Mean","P_DIM","Coef","SE","P_OLS_RI","P_OLS","Coef_IV","P_IV","N")
  rownames(resultsmat) <- colnames(matr)
  for(i in 1:ncol(matr)){
    v <- matr[,i]
    p <- pr[,i]
    resultsmat[i,] <- regress_iv(v,p,two.sided=twosided)
    print(paste0(i," of ",ncol(matr)))
  }
  resultsmat <- round(resultsmat,3)
  return(resultsmat)
}


simple_results <- function(va,pre,twosided=F){
  resultsmat <- matrix(nrow=1,ncol=10)
  colnames(resultsmat) <- c("C.Mean","T.Mean","P_DIM","Coef","SE","P_OLS_RI","P_OLS","Coef_IV","P_IV","N")
  rownames(resultsmat) <- c("DV")
  resultsmat[1,] <- regress_iv(va,pre,two.sided = twosided)
  resultsmat <- round(resultsmat,3)
  return(resultsmat)
}


multiple_results_econ <- function(matr,pr,twosided=F){
  resultsmat <- matrix(ncol=ncol(matr),nrow = 5)
  colnames(resultsmat) <- colnames(matr)
  rownames(resultsmat) <- c("Coef","SE","P_RI","C.Mean","N")
  for(i in 1:ncol(matr)){
    v <- matr[,i]
    p <- pr[,i]
    fullresults <- regress(v,p,two.sided = twosided)
    resultsmat[,i] <- fullresults[c(4:6,1,8)]
    print(paste0(i," of ",ncol(matr)))
  }
  resultsmat <- round(resultsmat,3)
  return(resultsmat)
}

likely_regress_ols <- function(var, pre){
  
  reg_model <- lm(var~d$treatment*d$likely+pre)
  reg_table <- summary(reg_model)$coefficients
  vc <- vcov(reg_model)
  
  rest <- reg_table[2,1:2]
  likely <- c(reg_table[2,1] + reg_table[5,1], sqrt(diag(vc)[2]+diag(vc)[5]+2*vc[2,5]))
  diff <- reg_table[5,1:2]

  return(c(likely,rest,diff))
}

likely_results_ols <- function(matr,pr){
  resultsmat <- matrix(nrow=ncol(matr),ncol=6)
  colnames(resultsmat) <- c("ATE_Likely","SE_Likely","ATE_Others","SE_Others","Difference","SE")
  rownames(resultsmat) <- colnames(matr)
  for(i in 1:ncol(matr)){
    v <- matr[,i]
    p <- pr[,i]
    resultsmat[i,] <- likely_regress_ols(v,p)
    print(paste0(i," of ",ncol(matr)))
  }
  return(resultsmat)
}



## ANALYSIS ####

## Table 1: Basic Demographics of Respondents ##

covariates <- cbind(d$age,d$male,ifelse(d$edu>=2,1,0),d$employed,d$married,d$st)
cov.table <- matrix(NA,nrow=ncol(covariates)+1,ncol=3)
rownames(cov.table) <- c("N","Mean Age","Pct Male","Pct 12th Grade","Pct Employed","Pct Married","Pct ST")
colnames(cov.table) <- c("Baseline","Midline","Endline")
responses <- list(rep(1,nrow(d)),d$respond_m,d$respond_e)

for(j in 1:3){
  cov_j <- covariates[responses[[j]]==1,]
  cov_j <- cov_j[!is.na(cov_j[,1]),]
  cov.table[1,j] <- nrow(cov_j)
  for(i in 1:ncol(covariates)){
    cov.table[i+1,j] <- mean(cov_j[,i],na.rm=T)
  }
}
print(xtable(data.frame(round(cov.table,2))))



## Figure 1 and Table 2: Migration ##
# Also Figure D.5; Tables D.13, D.14,D.16 ##


mat_h1 <- pre_h1 <- matrix(NA,nrow=nrow(d),ncol=4)
colnames(mat_h1) <- c("Moved","Training","Offer","Moved in India")

mat_h1[,1] <- ifelse(as.numeric(d$Q18a)==3|as.numeric(d$Q18b)==3|
                       as.numeric(d$Q18c)==3|as.numeric(d$Q18d)==3,1,0)
mat_h1[,2] <- ifelse(as.numeric(d$Q15)==2,1,0)
mat_h1[,3] <- ifelse(as.numeric(d$Q16)>=3,1,0)
mat_h1[,4] <- ifelse(as.numeric(d$Q18a)==2|as.numeric(d$Q18b)==2|
                       as.numeric(d$Q18c)==2|as.numeric(d$Q18d)==2,1,0)
pre_h1[,1] <- pre_h1[,2] <- pre_h1[,3] <- pre_h1[,4] <- d$english

# Figure 1

mean_table <- data.frame(Group = character(length=ncol(mat_h1)*2), N =numeric(length=ncol(mat_h1)*2))
mean_table$Group <- rep(c("Treatment","Control"),each=ncol(mat_h1))
mean_table$Var <- rep(colnames(mat_h1),times=2)

for(i in 1:nrow(mean_table)){
  groupresponses <- mat_h1[d$treatment==ifelse(mean_table$Group[i]=="Treatment",1,0),ifelse(i>4,i-4,i)]
  mean_table$N[i] <- mean(groupresponses,na.rm = T)*100
}

mean_table$Var <- rep(c("Migrated outside India","Attended job training program","Received overseas job offer","Migrated within India"), times=2)
mean_table$Var <- factor(mean_table$Var, levels=c("Migrated within India","Attended job training program","Received overseas job offer", "Migrated outside India"))

pdf("Figures/Figure_1.pdf", width = 7, height = 1.5)
f2a <- ggplot(data=mean_table, aes(x=N, y=Var, color=Group,group=Group)) +
  ylab("") + xlab("")+  labs(color="",fill="") +
  theme(text=element_text(family="serif"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank(),axis.line = element_line(colour = "black"),
        axis.text.y=element_text(colour="black"),axis.text.x=element_text(colour="black")) +
  scale_fill_grey(start=1,end=0) + scale_colour_grey(start=.6,end=0) +
  geom_point(stat="identity",size=2) + scale_x_continuous(limits = c(0,60))
f2a
dev.off()

# Table 2

table(d$Q5a)
comparison_mat <- d[,c("mQ5a","mQ5b","mQ5c","mQ5d","mQ5e")]
tab_2 <- matrix(NA, nrow=5,ncol=3)
rownames(tab_2) <- c("Better Paying Job","Skills Valued","Likely to be Promoted","Better Treatment","Less Discrimination")
colnames(tab_2) <- c("India","Gulf","DK/CS")
for(j in 1:ncol(comparison_mat)){
  for(i in 1:3){
    tab_2[j,i] <- mean(comparison_mat[,j]==i,na.rm=T)
  }}
round(tab_2*100)


#Figure D.5
loc_table <- data.frame(Answer=character(length = 30),Ans=numeric(length=30),
                        Date = character(length=30), Group = character(length=30), N =numeric(length=30))

loc_table$Ans <- rep(1:3,times=10)
loc_table$Answer <- rep(c("Mizoram","India","Abroad"),times=10)
loc_table$Group <- rep(c("Treatment","Control"),each=15)
loc_table$Date <- rep(rep(c("9/2018","7/2019","11/2019","2/2020","2/2021"),each=3),times=2)

for(i in 1:30){
  groupresponses <- d[d$treatment==ifelse(loc_table$Group[i]=="Treatment",1,0)&d$respond_e==1,
                      c("respond_e","Q18d","Q18c","Q18b","Q18a")]
  var <- groupresponses[,which(c("9/2018","7/2019","11/2019","2/2020","2/2021")==loc_table$Date[i])]
  loc_table$N[i] <- mean(as.numeric(var)==loc_table$Ans[i],na.rm = T)
}

loc_table$Answer <- factor(loc_table$Answer, levels=c("Mizoram","India","Abroad"))
loc_table$Date <- factor(loc_table$Date,levels=c("9/2018","7/2019","11/2019","2/2020","2/2021"))

pdf("Figures/Figure_D5.pdf", width = 5, height = 4)
ggplot(data=loc_table, aes(x=Date, y=N,group=Answer, color=Answer,fill=Answer)) +
  facet_grid(Group ~., scale="free_y", space = "free_y") +
  ylab("") + xlab("")+  labs(color="Location",fill="Location")+
  theme(text=element_text(family="serif")) + 
  scale_fill_grey(start=0.8,end=0) + scale_colour_grey(start=0.8,end=0) +
  geom_area(stat="identity") + scale_y_continuous(limits = c(0, 1)) + geom_vline(xintercept = loc_table$Date)
dev.off()

# Table D.13

results_moving <- multiple_results(mat_h1,pre_h1)
print(xtable(data.frame(results_moving),digits=3))
#Note: Direction on final hypothesis is opposite, so 1.000 p-value translates to .000 p-value.

# Where people moved
table(d$Q1b[strtrim(d$Q1a,5)=="India"],d$treatment[strtrim(d$Q1a,5)=="India"]) #in India
table(d$Q17[as.numeric(d$Q16)%in%6:7],d$treatment[as.numeric(d$Q16)%in%6:7]) # Overseas
table(d$Q1b[as.numeric(d$Q16)%in%6:7],d$treatment[as.numeric(d$Q16)%in%6:7]) # Overseas
unique(d$Q1a)

# Table D.14

table(d$Q16,d$treatment)

# Table D.16

efforts_mat <- matrix(NA,nrow=nrow(d),ncol=4)
efforts_mat[,2] <- ifelse(!is.na(as.numeric(d$mQ7)),ifelse(as.numeric(d$mQ7)%in%3:4,1,0),NA)
efforts_mat[,3] <- as.numeric(d$mQ39)
efforts_mat[,4] <- as.numeric(d$mQ45)
efforts_mat[,1] <- indexer(cbind(efforts_mat[,2],efforts_mat[,3],efforts_mat[,4]))
colnames(efforts_mat) <- c("Index","Passport","Laws","Employers")

pre_efforts <- cbind(d$english,d$english,d$english,d$english)
results_effort <- multiple_results(efforts_mat,pre_efforts)
print(xtable(data.frame(results_effort),digits=3))


## Figure 2: Redistribution Views ##
#Also includes Table D.15

d$bQ59 <- factor(d$bQ59,levels = c("Agree strongly","Agree somewhat","Neither agree nor disagree",
                                   "Disagree somewhat","Disagree strongly")) 
vars <- cbind(as.numeric(d$bQ59),d$mQ22,as.numeric(d$Q46),d$mQ20,as.numeric(d$Q44),d$mQ21,as.numeric(d$Q45))
colnames(vars) <- c("Pre_LowerTaxes","Mid_LowerTaxes","Post_LowerTaxes","Mid_BecomeRich","Post_BecomeRich",
                    "Mid_DontReduceInequality","Post_DontReduceInequality")

vars <- matrix(as.numeric(vars),ncol = ncol(vars))

vars[,1:3] <- matrix(6-vars[,1:3],ncol = 3)

mat_h5 <- cbind(indexer(vars[,c(3,5,7)]),vars[,3],vars[,5],vars[,7],
                indexer(vars[,c(2,4,6)]),vars[,2],vars[,4],vars[,6])
pre_h5 <- cbind(vars[,1],vars[,1],vars[,1],vars[,1],vars[,1],vars[,1],vars[,1],vars[,1])
colnames(mat_h5) <- c("Endline: Economic Index","Endline: Lower Taxes","Endline: Can Become Rich","Endline: Inequality Fine",
                      "Midline: Economic Index","Midline: Lower Taxes","Midline: Can Become Rich","Midline: Inequality Fine")

# Figure 2A

endline_h5 <- mat_h5[,c(1:4)]
redist_table <- data.frame(Var = c("Redistribution Index","Taxes","Mobility","Inequality"),
                           Coef = numeric(length = ncol(endline_h5)),SE=numeric(length = ncol(endline_h5)))
for(i in 1:ncol(endline_h5)){
  std_var <- indexer(cbind(mat_h5[,i]))
  redist_table[i,2:3] <- summary(lm(std_var~d$treatment+pre_h5[,i]))$coefficients[2,1:2]
}
redist_table$Upper <- redist_table$Coef+redist_table$SE*1.65
redist_table$Lower <- redist_table$Coef-redist_table$SE*1.65
redist_table$Group <- c("Index",rep("Component",times=3))
redist_table$Group <- factor(redist_table$Group,levels=c("Index","Component"))
redist_table$Var <- factor(redist_table$Var,levels=c("Mobility","Inequality","Taxes","Redistribution Index"))

pdf("Figures/Figure_2A.pdf", width = 5, height = 1.5)
f2b <- ggplot(redist_table[1:ncol(endline_h5),], aes(x = Coef, y = Var,color=Group)) +
  geom_point(size=2) + 
  scale_x_continuous(limits = c(-.6, .6)) +
  geom_errorbar(aes(xmin = Lower, xmax = Upper),width=0) + 
  geom_vline(xintercept = 0,color="black",linetype="dashed") + scale_colour_grey(start=0,end=.6) +
  theme_bw() + theme(text=element_text(family="serif"),legend.position="none",panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank(),axis.line = element_line(colour = "black"),
                     axis.text.y=element_text(colour="black"),axis.text.x=element_text(colour="black")) + 
  ylab("") + xlab("") + ggtitle("")
f2b
dev.off()


# Figure 2B

mat_hh <- cbind(5-as.numeric(d$hQ46),as.numeric(d$hQ45),as.numeric(d$hQ44))
mat_hh <- cbind(indexer(mat_hh),mat_hh)

colnames(mat_hh) <- c("Economic Index","Lower Taxes","Don't Reduce Inequality","Poor can advance")

redist_table <- data.frame(Var = c("Redistribution Index","Taxes","Mobility","Inequality"),
                           Coef = numeric(length = ncol(mat_hh)),SE=numeric(length = ncol(endline_h5)))

for(i in 1:ncol(mat_hh)){
  std_var <- indexer(cbind(mat_hh[,i]))
  redist_table[i,2:3] <- summary(lm(std_var~d$treatment+pre_h5[,i]))$coefficients[2,1:2]
}
redist_table$Upper <- redist_table$Coef+redist_table$SE*1.65
redist_table$Lower <- redist_table$Coef-redist_table$SE*1.65
redist_table$Group <- c("Index",rep("Component",times=3))
redist_table$Group <- factor(redist_table$Group,levels=c("Index","Component"))
redist_table$Var <- factor(redist_table$Var,levels=c("Mobility","Inequality","Taxes","Redistribution Index"))

pdf("Figures/Figure_2B.pdf", width = 5, height = 1.5)
f2b <- ggplot(redist_table[1:ncol(endline_h5),], aes(x = Coef, y = Var,color=Group)) +
  geom_point(size=2) + 
  scale_x_continuous(limits = c(-.6, .6)) +
  geom_errorbar(aes(xmin = Lower, xmax = Upper),width=0) + 
  geom_vline(xintercept = 0,color="black",linetype="dashed") + scale_colour_grey(start=0,end=.6) +
  theme_bw() + theme(text=element_text(family="serif"),legend.position="none",panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank(),axis.line = element_line(colour = "black"),
                     axis.text.y=element_text(colour="black"),axis.text.x=element_text(colour="black")) + 
  ylab("") + xlab("") + ggtitle("")
f2b
dev.off()


#Table D.15

mat_redist <- cbind(mat_h5,mat_hh)
pre_redist <- cbind(pre_h5,pre_h5[,1:4])

results_redistribution <- multiple_results(mat_redist,pre_redist)
print(xtable(data.frame(results_redistribution),digits=3))



## Table 3 and Table 4: Economic Effects ##
#Also Table D.17

d$pre_material <- indexer(as.matrix((d[,c("bQ22_1","bQ22_2","bQ22_3","bQ22_4","bQ22_5","bQ22_6")])))
d$post_material <- indexer(cbind(as.numeric(d$Q8.1),as.numeric(d$Q8.2),as.numeric(d$Q8.3),
                               as.numeric(d$Q8.4),as.numeric(d$Q8.5),as.numeric(d$Q8.6)))
by(d$pre_material[!is.na(d$post_material)],INDICES=d$treatment[!is.na(d$post_material)],FUN=mean)
by(d$post_material[!is.na(d$post_material)],INDICES=d$treatment[!is.na(d$post_material)],FUN=mean)

d$pre_income <- (d$bQ21=="Less than Rs. 5,000") + (d$bQ21=="Rs. 5,001 - Rs. 10,000") *2 +
  (d$bQ21=="Rs. 10,001 - Rs. 20,000") *3 + (d$bQ21=="Rs. 20,001 - Rs. 30,000") *4 +
  (d$bQ21=="Rs. 30,001 - Rs. 40,000") *5 + (d$bQ21=="Rs. 40,001 - Rs. 50,000") *6 +
  (d$bQ21=="Rs. 50,000 - Rs. 100,000") *7 + (d$bQ21=="Rs. 100,000 and above") *8
d$post_income <- as.numeric(d$Q7)

d$pre_employed <- ifelse(d$bQ14=="Yes",1,0)
d$post_employed <- ifelse(as.numeric(d$Q3)==1,1,0)

d$pre_wages <- ifelse(d$pre_employed==0,0,as.numeric(d$bQ17))
d$post_wages <- ifelse(d$post_employed==0,0,as.numeric(d$Q5))

mat_h2 <- cbind(indexer(cbind(d$post_income,d$post_material,d$post_employed,d$post_wages)),d$post_employed,d$post_wages,d$post_income,d$post_material)
colnames(mat_h2) <- c("Endline: Economic Index","Employed","Wages","Family Income","Material Goods")
pre_h2 <- cbind(indexer(cbind(d$pre_income,d$pre_material,d$pre_employed,d$pre_wages)),d$pre_employed,d$pre_wages,d$pre_income,d$pre_material)

#Table 3
results_income <- multiple_results_econ(mat_h2,pre_h2)
print(xtable(data.frame(results_income),digits = 3))

#Table D.17
results_income <- multiple_results(mat_h2,pre_h2)
print(xtable(data.frame(results_income),digits = 3))

#Non-Experimental: Comparing wages / incomes of international movers with domestic movers
movers <- ifelse(as.numeric(d$Q18a)==3 | as.numeric(d$Q18b)==3,"International",
                 ifelse(as.numeric(d$Q18a)==2 | as.numeric(d$Q18b)==2,"Domestic","None"))
by(d$post_wages,INDICES=movers,FUN=mean,na.rm=T)

# Table 4: Remittances
d$remittances <- ifelse(strtrim(d$Q65,1)==" ",0, as.numeric(d$Q65))
table(d$remittances,d$treatment)
mean(d$remittances[mat_h1[,1]==1],na.rm=T) # mean amount sent home
table(d$remittances[mat_h1[,1]==1])

mat_remit <- cbind(ifelse(!is.na(d$hQ15.G_PERSON_1),d$hQ15.G_PERSON_1, ifelse(d$respond_h==1,0,NA)),
                   ifelse(!is.na(d$hQ15.H_PERSON_1),d$hQ15.H_PERSON_1, ifelse(d$respond_h==1,0,NA)))
mat_remit <- cbind(rowSums(mat_remit),mat_remit)/12 # make it monthly
pre_remit <- cbind(d$pre_income,d$pre_income,d$pre_income)
colnames(mat_remit) <- c("Total Remittances","In Cash","In Gifts")

remit <- cbind(d$remittances, mat_remit[,1])
results_remit <- multiple_results_econ(remit,pre_remit)
print(xtable(data.frame(results_remit),digits = 3))



## Table D.18: Economic Confidence

vars<- cbind(d$bQ63,d$mQ48,d$mQ8,d$mQ9,d$mQ10,d$Q10,d$Q11,d$Q12,d$Q13)
colnames(vars) <- c("Pre_FutureAdvancement","Mid_FutureAdvancement","Mid_NextJobSalary","Mid_FamilyNextYear","Mid_ParentsAge",
                    "End_FutureAdvancement","End_NextJobSalary","End_FamilyNextYear","End_ParentsAge")
vars[,1] <- factor(vars[,1],levels = c("Strongly agree","Somewhat agree","Neither agree nor disagree",
                                       "Somewhat disagree","Strongly disagree")) 
vars <- matrix(as.numeric(vars),ncol = ncol(vars))
vars <- matrix(6-vars,ncol=ncol(vars))

mat_h4 <- cbind(indexer(vars[,6:9]),vars[,6],vars[,7],vars[,8],vars[,9])
pre_h4 <- cbind(vars[,1],vars[,1],vars[,1],vars[,1],vars[,1])
colnames(mat_h4) <- c("Endline: Confidence Index","Endline: Will Advance Professionally","Endline: Next Job Salary","Endline: Next Year Family Situation","Endline: Better off than parents")

results_confidence <- multiple_results(mat_h4,pre_h4)
print(xtable(data.frame(results_confidence),digits=3))



## Figure 3 and Table D.19: Family Planning

d$pre_marriage <- as.numeric(d$bQ74)
d$mid_marriage <- as.numeric(d$mQ12)
d$post_marriage <- as.numeric(d$Q48)

d$pre_childage <- as.numeric(d$bQ75)
d$mid_childage <- ifelse(d$mQ14b==0,NA,as.numeric(d$mQ14b))
d$post_childage <- as.numeric(d$Q49)

mat_h3 <- cbind(indexer(cbind(d$post_marriage,d$post_childage)),d$post_marriage,d$post_childage,
                indexer(cbind(d$mid_marriage,d$mid_childage)),d$mid_marriage,d$mid_childage)
pre_h3 <- cbind(indexer(cbind(d$pre_marriage,d$pre_childage)),d$pre_marriage,d$pre_childage,
                indexer(cbind(d$pre_marriage,d$pre_childage)),d$pre_marriage,d$pre_childage)
colnames(mat_h3) <- c("Endline: Life Plans Index","Marriage Age","Childbearing Age",
                      "Midline: Life Plans Index","Marriage Age","Childbearing Age")

#difference in means by gender
by(data=d$post_marriage, INDICES=list(d$treatment,d$male),FUN=mean,na.rm=T)
by(data=d$post_childage, INDICES=list(d$treatment,d$male),FUN=mean,na.rm=T)

#Table D.19
results_plans <- multiple_results(mat_h3,pre_h3)
print(xtable(data.frame(results_plans),digits=3))

#Actual marriages
got_married <- ifelse(!is.na(as.numeric(d$Q47)) & as.numeric(d$Q47)==3 & d$bQ10=="No, never married",1,
                      ifelse(is.na(as.numeric(d$Q47)),NA,0))
table(got_married,d$treatment)
t.test(got_married[d$treatment==1],got_married[d$treatment==0])

# Figure 3

vars<- cbind(d$pre_marriage,d$mid_marriage,d$post_marriage,d$pre_childage,d$mid_childage,d$post_childage)
colnames(vars) <- c("Pre_Marriage","Mid_Marriage","Post_Marriage","Pre_childAge","Mid_ChildAge","Post_ChildAge")
vars <- cbind(vars,vars)
mean_table <- data.frame(Date = character(length=ncol(vars)), Group = character(length=ncol(vars)), N =numeric(length=ncol(vars)))
mean_table$Group <- rep(c("Treatment","Control"),each=ncol(vars)/2)
mean_table$Date <- rep(c("Pre","Mid","Post"),times=ncol(vars)/3)
mean_table$Var <- rep(c(rep("Marriage Age",3),rep("Childbearing Age",3)),times=ncol(vars)/6)

for(i in 1:nrow(mean_table)){
  groupresponses <- vars[d$treatment==ifelse(mean_table$Group[i]=="Treatment",1,0),]
  var <- groupresponses[,i]
  mean_table$N[i] <- mean(as.numeric(var),na.rm = T)
}
mean_table$Date <- factor(mean_table$Date,levels=c("Pre","Mid","Post"))
mean_table$Var <- factor(mean_table$Var,levels=c("Marriage Age","Childbearing Age"))

pdf("Figures/Figure_3.pdf", width = 6, height = 3)
ggplot(data=mean_table, aes(x=Date, y=N, color=Group,group=Group)) +
  facet_wrap(Var~.,ncol=2) +
  ylab("") + xlab("")+  labs(color="Group",fill="Group",shape="Group") +
  theme(text=element_text(family="serif")) + 
  scale_fill_grey(start=0.6,end=0) + scale_colour_grey(start=.6,end=0) +
  geom_line(stat="identity",size=2) + geom_point(stat="identity",size=4) + scale_y_continuous(limits = c(29,34))
dev.off()


## Figure 4: Redistribution Effects over Time ##

indexes_graph <- data.frame(Time=c("Pre-Selection","Post-Selection","Post-Migration"),
                            Diff=numeric(length=3),SE=numeric(length=3))

index_vars <- cbind(indexer(as.matrix(vars[,1])),mat_h5[,5],mat_h5[,1])

for(i in 1:3){
  indexes_graph[i,2:3] <- summary(lm(index_vars[,i]~d$treatment))$coefficients[2,1:2]
}
indexes_graph$Upper <- indexes_graph$Diff + indexes_graph$SE*1.65
indexes_graph$Lower <- indexes_graph$Diff - indexes_graph$SE*1.65
indexes_graph$Time <- factor(indexes_graph$Time,levels = c("Post-Migration","Post-Selection","Pre-Selection"))

pdf("Figures/Figure_4.pdf", width = 5.5, height = 1.8)
ggplot(indexes_graph, aes(x = Diff, y = Time)) +
  geom_point(size=2) + 
  scale_x_continuous(limits = c(-.6, .6)) +
  geom_errorbar(aes(xmin = Lower, xmax = Upper),width=0) + 
  geom_vline(xintercept = 0,color="black",linetype="dashed") +
  theme_bw() + theme(text=element_text(family="serif"),legend.position="none") + 
  ylab("") + xlab("Difference between Treatment and Control Attitudes") + ggtitle("")
dev.off()


### TABLE D20: Benchmarking Effect size

# Within our sample

summary(lm(mat_h5[,1]~ifelse(pre_h2[,1]>=0,1,0)))
summary(lm(mat_h5[,1]~ifelse(pre_h5[,1]>=4,1,0)))

# WVS

wvs <- read_dta("wvs_2.dta")
country_name <- c('Pakistan','Bangladesh','Indonesia','United States')
country <- c(62, 7, 36, 87)
wvs <- wvs[wvs$B_COUNTRY%in%country, ]
table(wvs$Q97)
union <- ifelse(wvs$Q97<5,NA,ifelse(wvs$Q97>5,1,0))
male <- ifelse(wvs$Q260<5,NA,ifelse(wvs$Q260==5,1,0))
secondary_ed <- ifelse(wvs$Q275<6,NA,ifelse(wvs$Q275>=9,1,0))
redist_qs <- cbind(ifelse(wvs$Q106<5,NA,wvs$Q106-4),
                   ifelse(wvs$Q108<5,NA,wvs$Q108-4),
                   ifelse(wvs$Q110<5,NA,15-wvs$Q110))
results_mat <- data.frame(Country=country_name,Union=numeric(length=length(country)),
                          Gender=numeric(length=length(country)),Education=numeric(length=length(country)))

for(i in 1:length(country)){
  y <- indexer(redist_qs[wvs$B_COUNTRY==country[i],])
  results_mat[i,2] <- summary(lm(y~union[wvs$B_COUNTRY==country[i]]))$coefficients[2,1]
  results_mat[i,3] <- summary(lm(y~male[wvs$B_COUNTRY==country[i]]))$coefficients[2,1]
  results_mat[i,4] <- summary(lm(y~secondary_ed[wvs$B_COUNTRY==country[i]]))$coefficients[2,1]
}
results_mat

rm(wvs)




#### MECHANISM TESTS ####


## Table E.21: Institutional Trust ##

#Variables
mat_h6 <- pre_h6 <- matrix(NA,nrow=nrow(d),ncol=7)
trust_qs <- cbind(as.numeric(d$Q28a),as.numeric(d$Q28b),as.numeric(d$Q28c),
                  as.numeric(d$Q29a),as.numeric(d$Q29b),as.numeric(d$Q29c))
mat_h6 <- cbind(indexer(trust_qs),trust_qs)

pre_trust <- d[,c("bQ33a","bQ33b","bQ33c")]
pre_trust <- matrix((pre_trust=="A great deal of trust")*4 +
  (pre_trust=="Some trust")*3 + (pre_trust=="No answer")*2 +
  (pre_trust=="A little trust")*2 + (pre_trust=="No trust at all")*1,ncol=3)
pre_h6 <- cbind(indexer(pre_trust),pre_trust,pre_trust)

colnames(mat_h6) <- colnames(pre_h6) <- c("Institutions Index","Trust: National","Trust: State","Trust: Local",
                                          "Capable: National","Capable: State","Capable: Local")

results_h6 <- multiple_results_econ(mat_h6,pre_h6,twosided = T)
print(xtable(data.frame(results_h6),digits=3))


## Table E.22 and Table E.23: Job Training Effects ##

d$uptake <- mat_h1[,2]
d$moved <- mat_h1[,1]
table(d$uptake, d$treatment)

training_1 <- lm(mat_h2[d$treatment==0,1]~uptake+age+male+employed+married+edu+st+pre_h2[d$treatment==0,1],data=d[d$treatment==0,])
training_2 <- lm(mat_h3[d$treatment==0,1]~uptake+age+male+employed+married+edu+st+pre_h3[d$treatment==0,1],data=d[d$treatment==0,])
training_3 <- lm(mat_h4[d$treatment==0,1]~uptake+age+male+employed+married+edu+st+pre_h4[d$treatment==0,1],data=d[d$treatment==0,])
training_4 <- lm(mat_h5[d$treatment==0,1]~uptake+age+male+employed+married+edu+st+pre_h5[d$treatment==0,1],data=d[d$treatment==0,])
#Table E.22: Control Group
stargazer(training_1,training_2,training_3,training_4)

# On treatment group

training_1 <- lm(mat_h2[d$treatment==1,1]~uptake+moved+age+male+employed+married+edu+st+pre_h2[d$treatment==1,1],data=d[d$treatment==1,])
training_2 <- lm(mat_h3[d$treatment==1,1]~uptake+moved+age+male+employed+married+edu+st+pre_h3[d$treatment==1,1],data=d[d$treatment==1,])
training_3 <- lm(mat_h4[d$treatment==1,1]~uptake+moved+age+male+employed+married+edu+st+pre_h4[d$treatment==1,1],data=d[d$treatment==1,])
training_4 <- lm(mat_h5[d$treatment==1,1]~uptake+moved+age+male+employed+married+edu+st+pre_h5[d$treatment==1,1],data=d[d$treatment==1,])
#Table E.23: Treatment Group
stargazer(training_1,training_2,training_3,training_4)


## Table E.24: Likely Compliers Analysis ##

ols_vars <- cbind(mat_h1[,1],mat_h2[,1],mat_h5[,1])
ols_pre <- cbind(pre_h1[,1],pre_h2[,1],pre_h5[,1])
colnames(ols_vars) <- c("Migrated","Econ Status","Support for Redistribution")
results_compliers <- likely_results_ols(ols_vars,ols_pre)
print(xtable(data.frame(results_compliers),digits=3))


#### ATTRITION BIAS CHECKS ####

respond_e2 <- c(d$respond_e,0,0,0)
respond_m2 <- c(d$respond_m,0,0,0)
respond_h2 <- c(d$respond_h,0,0,0)
treat2 <- c(d$treatment,1,1,1)

## Table B.2: Balance Table ##

#F-Test on respondents' baseline covariates among all subjects (nothing is predictive)
#(still nothing is predictive)
balance.model1 <- lm(d$treatment ~ age+male+edu+employed+st+married+english+
                       pre_h2[,1]+pre_h4[,1]+pre_h5[,1],data=d)
summary(balance.model1)
f_1 <- summary(balance.model1)$fstatistic[1]
f_reps <- rep(NA,reps)
for(i in 1:reps){
  newmod <- lm(treat_hyp[,i] ~ d$age+d$male+d$edu+d$employed+d$st+d$married+d$english+
                 pre_h2[,1]+pre_h4[,1]+pre_h5[,1])
  f_reps[i]<-summary(newmod)$fstatistic[1]
}
mean(f_reps>=f_1)

#F-Stat p-value

#F-Test on respondents' baseline covariates among endline responders (still nothing is predictive)
balance.model2 <- lm(treatment ~ age+male+edu+employed+st+married+english+
                       pre_h2[d$respond_e==1,1]+pre_h4[d$respond_e==1,1]+pre_h5[d$respond_e==1,1],data=d[d$respond_e==1,])
summary(balance.model2)
f_2 <- summary(balance.model2)$fstatistic[1]
f_reps <- rep(NA,reps)
for(i in 1:reps){
  newmod <- lm(treat_hyp[d$respond_e==1,i] ~ age+male+edu+employed+st+married+english+
                 pre_h2[d$respond_e==1,1]+pre_h4[d$respond_e==1,1]+pre_h5[d$respond_e==1,1],data=d[d$respond_e==1,])
  f_reps[i]<-summary(newmod)$fstatistic[1]
}
mean(f_reps>=f_2)

#F-Test on respondents' baseline covariates among midline responders (still nothing is predictive)
balance.model3 <- lm(treatment ~ age+male+edu+employed+st+married+english+
                       pre_h2[d$respond_m==1,1]+pre_h4[d$respond_m==1,1]+pre_h5[d$respond_m==1,1],data=d[d$respond_m==1,])
summary(balance.model3)
f_3 <- summary(balance.model3)$fstatistic[1]
f_reps <- rep(NA,reps)
for(i in 1:reps){
  newmod <- lm(treat_hyp[d$respond_m==1,i] ~ age+male+edu+employed+st+married+english+
                 pre_h2[d$respond_m==1,1]+pre_h4[d$respond_m==1,1]+pre_h5[d$respond_m==1,1],data=d[d$respond_m==1,])
  f_reps[i]<-summary(newmod)$fstatistic[1]
}
mean(f_reps>=f_3)

#F-Test on respondents' baseline covariates among household responders
balance.model4 <- lm(treatment ~ age+male+edu+employed+st+married+english+
                       pre_h2[d$respond_h==1,1]+pre_h4[d$respond_h==1,1]+pre_h5[d$respond_h==1,1],data=d[d$respond_h==1,])
summary(balance.model4)
f_4 <- summary(balance.model4)$fstatistic[1]
f_reps <- rep(NA,reps)
for(i in 1:reps){
  newmod <- lm(treat_hyp[d$respond_h==1,i] ~ age+male+edu+employed+st+married+english+
                 pre_h2[d$respond_h==1,1]+pre_h4[d$respond_h==1,1]+pre_h5[d$respond_h==1,1],data=d[d$respond_h==1,])
  f_reps[i]<-summary(newmod)$fstatistic[1]
}
mean(f_reps>=f_4)

stargazer(balance.model1,balance.model3,balance.model2,balance.model4)


## Table B.3: Response Rate by Treatment Group ##

t.test(respond_e2[treat2==1],respond_e2[treat2==0]) # endline
t.test(respond_m2[treat2==1],respond_m2[treat2==0]) # midline
t.test(respond_h2[treat2==1],respond_h2[treat2==0]) # household

t_hat_e <- mean(respond_e2[treat2==1],na.rm = T)-mean(respond_e2[treat2==0],na.rm = T)
t_hat_m <- mean(respond_m2[treat2==1],na.rm = T)-mean(respond_m2[treat2==0],na.rm = T)
t_hat_h <- mean(respond_h2[treat2==1],na.rm = T)-mean(respond_h2[treat2==0],na.rm = T)
t_ests_e <- t_ests_m <- t_ests_h <- rep(NA,reps)
for(i in 1:reps){
  t_ests_e[i] <- mean(respond_e2[treat_hyp[,i]==1],na.rm=T) - mean(respond_e2[treat_hyp[,i]==0],na.rm=T) 
  t_ests_m[i] <- mean(respond_m2[treat_hyp[,i]==1],na.rm=T) - mean(respond_m2[treat_hyp[,i]==0],na.rm=T) 
  t_ests_h[i] <- mean(respond_h2[treat_hyp[,i]==1],na.rm=T) - mean(respond_h2[treat_hyp[,i]==0],na.rm=T) 
}
mean(abs(t_hat_e)<=abs(t_ests_e))
mean(abs(t_hat_m)<=abs(t_ests_m))
mean(abs(t_hat_h)<=abs(t_ests_h))


## Table B.4: Predictors of Midlines response

attrition_m1 <- lm(d$respond_m ~ d$age+d$edu+d$st+d$employed+d$married+d$male+d$english)
attrition_m2 <- lm(d$respond_m ~ d$age+d$edu+d$st+d$employed+d$married+d$male+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1])
attrition_m3 <- lm(d$respond_m ~ d$treatment*(d$age+d$edu+d$st+d$employed+d$married+d$male+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1]))

summary(attrition_m1)
summary(attrition_m2)
summary(attrition_m3)
stargazer(attrition_m1,attrition_m2,attrition_m3)

## Table B.5: Predictors of Endline Response

attrition_e1 <- lm(d$respond_e ~ d$age+d$edu+d$st+d$employed+d$married+d$male+d$english)
attrition_e2 <- lm(d$respond_e ~ d$age+d$edu+d$st+d$employed+d$married+d$male+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1])
attrition_e3 <- lm(d$respond_e ~ d$treatment*(d$age+d$edu+d$st+d$employed+d$married+d$male+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1]))

summary(attrition_e1)
summary(attrition_e2)
summary(attrition_e3)
stargazer(attrition_e1,attrition_e2,attrition_e3)

## Table B.6: Predictors of Household Response
attrition_h1 <- lm(d$respond_h ~ d$age+d$edu+d$st+d$employed+d$married+d$male+d$english)
attrition_h2 <- lm(d$respond_h ~ d$age+d$edu+d$st+d$employed+d$married+d$male+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1])
attrition_h3 <- lm(d$respond_h ~ d$treatment*(d$age+d$edu+d$st+d$employed+d$married+d$male+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1]))

summary(attrition_h1)
summary(attrition_h2)
summary(attrition_h3)
stargazer(attrition_h1,attrition_h2,attrition_h3)


## Table B.7: Individual-Level Results with Controls



e_complete <- ifelse(!is.na(d$age+d$male+d$edu+d$employed+d$st+d$married+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1])&d$respond_e==1,1,0)
e_m1 <- lm(mat_h5[e_complete==1,1]~treatment+age+male+edu+employed+st+married+english+pre_h2[e_complete==1,1]+pre_h4[e_complete==1,1]+pre_h5[e_complete==1,1],data=d[e_complete==1,])
e_m2 <- lm(mat_h5[e_complete==1,2]~treatment+age+male+edu+employed+st+married+english+pre_h2[e_complete==1,1]+pre_h4[e_complete==1,1]+pre_h5[e_complete==1,1],data=d[e_complete==1,])
e_m3 <- lm(mat_h5[e_complete==1,3]~treatment+age+male+edu+employed+st+married+english+pre_h2[e_complete==1,1]+pre_h4[e_complete==1,1]+pre_h5[e_complete==1,1],data=d[e_complete==1,])
e_m4 <- lm(mat_h5[e_complete==1,4]~treatment+age+male+edu+employed+st+married+english+pre_h2[e_complete==1,1]+pre_h4[e_complete==1,1]+pre_h5[e_complete==1,1],data=d[e_complete==1,])

stargazer(e_m1,e_m2,e_m3,e_m4)


# Table B.8: Household Results with Controls

hh_complete <- ifelse(!is.na(d$age+d$male+d$edu+d$employed+d$st+d$married+d$english+pre_h2[,1]+pre_h4[,1]+pre_h5[,1])&d$respond_h==1,1,0)
hh_m1 <- lm(mat_hh[hh_complete==1,1]~treatment+age+male+edu+employed+st+married+english+pre_h2[hh_complete==1,1]+pre_h4[hh_complete==1,1]+pre_h5[hh_complete==1,1],data=d[hh_complete==1,])
hh_m2 <- lm(mat_hh[hh_complete==1,2]~treatment+age+male+edu+employed+st+married+english+pre_h2[hh_complete==1,1]+pre_h4[hh_complete==1,1]+pre_h5[hh_complete==1,1],data=d[hh_complete==1,])
hh_m3 <- lm(mat_hh[hh_complete==1,3]~treatment+age+male+edu+employed+st+married+english+pre_h2[hh_complete==1,1]+pre_h4[hh_complete==1,1]+pre_h5[hh_complete==1,1],data=d[hh_complete==1,])
hh_m4 <- lm(mat_hh[hh_complete==1,4]~treatment+age+male+edu+employed+st+married+english+pre_h2[hh_complete==1,1]+pre_h4[hh_complete==1,1]+pre_h5[hh_complete==1,1],data=d[hh_complete==1,])

stargazer(hh_m1,hh_m2,hh_m3,hh_m4)


## Figure B.3: Endline Results, Sensitivity Analysis for Attrition Bias ##

table(d$respond_e,d$treatment,useNA = "always") # we need to remove 26 of the 165 control units to even out treatment and control

sensitivity <- data.frame(Scenario=character(length=7),ATE=numeric(length=7),SE=numeric(length=7),Cor=numeric(length=7))
sensitivity$Scenario <- c("Max Estimated","","Same"," ","Min Estimated","Zero","Opposite")
sensitivity$Scenario <- factor(sensitivity$Scenario,levels=sensitivity$Scenario)

set.seed(10232023)
scenario_c <- rnorm(77,mean=0,sd=1)
epsilon_t <- rnorm(64,mean=0,sd=1)
scenario_t <- cbind(0.5+epsilon_t, # max estimated heterogeneous effect
                     0.49+epsilon_t, # effect estimated among migrants
                     0.35+epsilon_t, # ATE estimated overall
                     0.29+epsilon_t, # effect estimated among non-migrants
                     0.2+epsilon_t, # min estimated heterogeneous effect
                     0+epsilon_t, # no effect
                     -.35+epsilon_t) # opposite direction ATE

for(i in 1:7){
  y <- mat_h5[,1]
  x <- pre_h5[,1]
  z <- d$treatment
  y[is.na(y)&!(is.na(d$treatment))&d$treatment==0] <- scenario_c
  y[is.na(y)&!(is.na(d$treatment))&d$treatment==1] <- scenario_t[,i]
  sensitivity[i,2:3] <- summary(lm(y~z+x))$coefficients[2,1:2]
  sensitivity[i,4] <- cor(y[z==1],d$respond_e[z==1],use = "complete.obs")
}

sensitivity$Upper <- sensitivity$ATE+1.65*sensitivity$SE
sensitivity$Lower <- sensitivity$ATE-1.65*sensitivity$SE
sensitivity

pdf("Figures/Figure_B3.pdf", width = 5, height = 3)
ggplot(sensitivity, aes(y = ATE, x = Scenario,label=Scenario)) +
  geom_point(size=2) +
  scale_y_continuous(limits = c(-.2, .8)) +
  geom_errorbar(aes(ymin = Lower, ymax = Upper),width=0) + 
  geom_hline(yintercept = 0,color="black",linetype="dashed") +
  scale_x_discrete(labels = function(x) 
    stringr::str_wrap(sensitivity$Scenario, width = 15))+
  theme_bw() + theme(text=element_text(family="serif"),legend.position="none") + 
  ylab("Hypothetical ATE") + xlab("Predicted ATE among Attriters") + ggtitle("")
dev.off()


# Figure B.4: Household Results, Sensitivity Analysis for Differential Attrition ##

table(!is.na(mat_hh[,1]),d$treatment,useNA = "always") # we need to remove 26 of the 165 control units to even out treatment and control

sensitivity <- data.frame(Scenario=character(length=7),ATE=numeric(length=7),SE=numeric(length=7))
sensitivity$Scenario <- c("Most Pro- Redistribution",""," ","None","  ","   ","Most Anti- Redistribution")
sensitivity$Scenario <- factor(sensitivity$Scenario,levels=sensitivity$Scenario)

set.seed(9202023)
scenarios <- cbind(c(rep(0,26),rep(1,139)),# G. 26 most conservative respondents removed
                   c(sample(c(rep(0,26),rep(1,56))),rep(1,83)), # F. 26 respondents removed from 50th percentile and above
                   c(sample(c(rep(0,26),rep(1,98))),rep(1,41)), # E. 26 respondents removed from 25th percentile and above
                   rep(1,165), #D. 26 respondents removed from entire redistribution
                   c(rep(1,41),sample(c(rep(0,26),rep(1,98)))), #C. 26 respondents removed from 75th percentile and below
                   c(rep(1,83),sample(c(rep(0,26),rep(1,56)))), #B. 26 respondents removed from 50th percentile and below
                   c(rep(1,139),rep(0,26))) # A. 26 most progressive respondents removed
i=1
for(i in 1:7){
  z <- c(rep(1,139),rep(0,139))
  if(i==4) z <- c(rep(1,139),rep(0,165))
  
  y1 <- cbind(mat_hh[!is.na(d$treatment)&d$treatment==1,1],pre_h5[!is.na(d$treatment)&d$treatment==1,1])
  y1 <- y1[!is.na(y1[,1]),]
  y2 <- cbind(mat_hh[d$treatment==0,1],pre_h5[d$treatment==0,1])
  y2 <- y2[!is.na(y2[,1]),]
  y2 <- y2[order(y2[,1],decreasing = F),]
  y2 <- y2[scenarios[,i]==1,]
  y <- rbind(y1,y2)
  sensitivity[i,2:3] <- summary(lm(y[,1]~z+y[,2]))$coefficients[2,1:2]
}

sensitivity$Upper <- sensitivity$ATE+1.65*sensitivity$SE
sensitivity$Lower <- sensitivity$ATE-1.65*sensitivity$SE
sensitivity

pdf("Figures/Figure_B4.pdf", width = 5, height = 3)
ggplot(sensitivity, aes(y = ATE, x = Scenario,label=Scenario)) +
  geom_point(size=2) +
  scale_y_continuous(limits = c(-.8, .3)) +
  geom_errorbar(aes(ymin = Lower, ymax = Upper),width=0) + 
  geom_hline(yintercept = 0,color="black",linetype="dashed") +
  scale_x_discrete(labels = function(x) 
    stringr::str_wrap(sensitivity$Scenario, width = 15))+
  theme_bw() + theme(text=element_text(family="serif"),legend.position="none") + 
  ylab("ATE Among Hypothetical Sample") + xlab("Control Respondents Removed") + ggtitle("")
dev.off()


#### EXTERNAL VALIDITY TESTS ####

## Table G.27: Heterogeneous Effects ##

main_vars <- cbind(d$redist_self,d$redist_hh,d$econ_standing)
main_var_names <- c("Redistribution Views","HH Redistribution Views","Economic Standing")
d$redist_self <- mat_h5[,1]
d$redist_hh <- mat_hh[,1]
d$econ_standing <- mat_h2[,1]
d$christian <- ifelse(d$bQ3=="Christian",1,0)

cov_vars <- cbind(covariates[,c(1:4,6)],ifelse(d$bQ3=="Christian",1,0),d$pre_wages,d$pre_income,d$pre_material)

het_results <- het_cor <- matrix(NA, nrow = ncol(cov_vars),ncol=ncol(main_vars))
colnames(het_results) <- colnames(het_cor) <- main_var_names
rownames(het_results) <- rownames(het_cor) <- c("Age","Gender","Education","Employed","ST","Christian","Wages","Family Income","Assets")

for(i in 1:nrow(het_results)){
  for(j in 1:ncol(het_results)){
    covar <- cov_vars[,i]
    outcome <- main_vars[,j]
    het_results[i,j] <- summary(lm(outcome~d$treatment*covar))$coefficients[4,3]
    het_cor[i,j] <- cor(outcome[d$treatment==1],covar[d$treatment==1],use = "complete.obs")
  }
}
print(xtable(het_results,digits=2))


## All Pre-Treatment Covariates (Egami et al)

require(devtools)
#install_github("naoki-egami/exr", dependencies = TRUE) # use if necessary
require(exr)

heterogeneous_effects <- data.frame(Redist_Self=numeric(length=nrow(d)),Redist_HH=numeric(length=nrow(d)),
                                    Econ_Standing=numeric(length=nrow(d)))

for(i in 1:ncol(heterogeneous_effects)){
  v <- main_vars[,i]
  sate_est <- summary(lm(main_vars[,i]~treatment,data = d))$coefficients[2,1:2]
  if(i==1) exr_out <- exr(outcome = "redist_self", treatment = "treatment", covariates = c("age", "male", "edu","employed", "married", "st","christian","pre_income","pre_wages","pre_material"), data = d,sate_estimate = sate_est) 
  if(i==2) exr_out <- exr(outcome = "redist_hh", treatment = "treatment", covariates = c("age", "male", "edu","employed", "married", "st","christian","pre_income","pre_wages","pre_material"), data = d,sate_estimate = sate_est) 
  if(i==3) exr_out <- exr(outcome = "econ_standing", treatment = "treatment", covariates = c("age", "male", "edu","employed", "married", "st","christian","pre_income","pre_wages","pre_material"), data = d,sate_estimate = sate_est) 
  tau_estimates <- exr_out$tau
  heterogeneous_effects[1:length(tau_estimates),i] <- matrix(tau_estimates,ncol=1)
}
heterogeneous_effects[heterogeneous_effects==0] <- NA

mat_he <- NULL
for(i in 1:ncol(heterogeneous_effects)){
  mat_he <- rbind(mat_he, cbind(main_var_names[i],heterogeneous_effects[!is.na(heterogeneous_effects[,i]),i]))
}
heterogeneous_effects <- data.frame(mat_he)
colnames(heterogeneous_effects) <- c("Variable","Tau")
heterogeneous_effects$Variable <- factor(heterogeneous_effects$Variable,levels=main_var_names)
heterogeneous_effects[,2] <- as.numeric(heterogeneous_effects[,2])
min(heterogeneous_effects[,2])

pdf("Figures/Figure_G8.pdf", width = 4, height = 3)
ggplot(heterogeneous_effects, aes(x=Tau)) + facet_wrap(~Variable,ncol=1) +
  geom_histogram(binwidth = .05) + theme(text=element_text(family="serif")) + geom_vline(xintercept = 0)+
  scale_x_continuous(limits = c(-0.5,1)) + xlab("Estimated Treatment Effects") + ylab("")
dev.off()


